Preliminaries: load required packages and perform merges

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: Hmisc
## 
## Loading required package: lattice
## 
## Loading required package: survival
## 
## Loading required package: Formula
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## 
## Attaching package: 'caret'
## 
## 
## The following object is masked from 'package:survival':
## 
##     cluster
## 
## 
## The following object is masked from 'package:purrr':
## 
##     lift
## 
## 
## 
## Attaching package: 'xgboost'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     slice
## 
## 
## Loading required package: foreach
## 
## 
## Attaching package: 'foreach'
## 
## 
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## 
## 
## Loading required package: iterators
## 
## Loading required package: parallel
## 
## 
## Attaching package: 'pdp'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     partial

create weight manually

rm(list = ls())
library(tidyverse)
library(readxl)
filenames <- list.files("./Ranked Measure Data2")
filenames
## [1] "2015.xls" "2016.xls" "2017.xls" "2018.xls" "2019.xls"
RMD <- data.frame(matrix(ncol = 0, nrow = 0))
for (filename in filenames){  
   yearnum <- gsub(".xls", "", filename)  
   RMD = RMD %>% bind_rows(assign(paste0("RMD",yearnum), read_excel(paste0("./Ranked Measure Data2/", filename), sheet="Ranked Measure Data", skip = 1) %>%
            mutate(year=yearnum) %>% 
            select(FIPS, year, YPLLRateLow, YPLLRateHigh)))
}
## Warning: Expecting numeric in CB3145 / R3145C80: got '^This data was updated on March 29, 2017. Please see http://www.countyhealthrankings.org/content/data-changes for more information.'
## New names:
## • `Quartile` -> `Quartile...8`
## • `Sample Size` -> `Sample Size...9`
## • `95% CI - Low` -> `95% CI - Low...11`
## • `95% CI - High` -> `95% CI - High...12`
## • `Quartile` -> `Quartile...13`
## • `Sample Size` -> `Sample Size...14`
## • `95% CI - Low` -> `95% CI - Low...16`
## • `95% CI - High` -> `95% CI - High...17`
## • `Quartile` -> `Quartile...18`
## • `Sample Size` -> `Sample Size...19`
## • `95% CI - Low` -> `95% CI - Low...21`
## • `95% CI - High` -> `95% CI - High...22`
## • `Quartile` -> `Quartile...23`
## • `95% CI - Low` -> `95% CI - Low...28`
## • `95% CI - High` -> `95% CI - High...29`
## • `Quartile` -> `Quartile...30`
## • `Sample Size` -> `Sample Size...31`
## • `95% CI - Low` -> `95% CI - Low...33`
## • `95% CI - High` -> `95% CI - High...34`
## • `Quartile` -> `Quartile...35`
## • `95% CI - Low` -> `95% CI - Low...37`
## • `95% CI - High` -> `95% CI - High...38`
## • `Quartile` -> `Quartile...39`
## • `Quartile` -> `Quartile...41`
## • `95% CI - Low` -> `95% CI - Low...43`
## • `95% CI - High` -> `95% CI - High...44`
## • `Quartile` -> `Quartile...45`
## • `Quartile` -> `Quartile...48`
## • `Sample Size` -> `Sample Size...49`
## • `95% CI - Low` -> `95% CI - Low...51`
## • `95% CI - High` -> `95% CI - High...52`
## • `Quartile` -> `Quartile...53`
## • `Quartile` -> `Quartile...57`
## • `Quartile` -> `Quartile...60`
## • `95% CI - Low` -> `95% CI - Low...64`
## • `95% CI - High` -> `95% CI - High...65`
## • `Quartile` -> `Quartile...66`
## • `95% CI - Low` -> `95% CI - Low...69`
## • `95% CI - High` -> `95% CI - High...70`
## • `Quartile` -> `Quartile...71`
## • `Quartile` -> `Quartile...75`
## • `Quartile` -> `Quartile...79`
## • `# Mental Health Providers` -> `# Mental Health Providers...80`
## • `MHP Rate` -> `MHP Rate...81`
## • `MHP Ratio` -> `MHP Ratio...82`
## • `# Mental Health Providers` -> `# Mental Health Providers...83`
## • `MHP Rate` -> `MHP Rate...84`
## • `MHP Ratio` -> `MHP Ratio...85`
## • `Quartile` -> `Quartile...86`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...87`
## • `95% CI - Low` -> `95% CI - Low...89`
## • `95% CI - High` -> `95% CI - High...90`
## • `Quartile` -> `Quartile...91`
## • `95% CI - Low` -> `95% CI - Low...94`
## • `95% CI - High` -> `95% CI - High...95`
## • `Quartile` -> `Quartile...96`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...97`
## • `95% CI - Low` -> `95% CI - Low...99`
## • `95% CI - High` -> `95% CI - High...100`
## • `Quartile` -> `Quartile...101`
## • `Quartile` -> `Quartile...104`
## • `95% CI - Low` -> `95% CI - Low...108`
## • `95% CI - High` -> `95% CI - High...109`
## • `Quartile` -> `Quartile...110`
## • `Quartile` -> `Quartile...114`
## • `95% CI - Low` -> `95% CI - Low...117`
## • `95% CI - High` -> `95% CI - High...118`
## • `Quartile` -> `Quartile...119`
## • `Quartile` -> `Quartile...123`
## • `95% CI - Low` -> `95% CI - Low...127`
## • `95% CI - High` -> `95% CI - High...128`
## • `Quartile` -> `Quartile...129`
## • `Quartile` -> `Quartile...132`
## • `Quartile` -> `Quartile...135`
## • `95% CI - Low` -> `95% CI - Low...138`
## • `95% CI - High` -> `95% CI - High...139`
## • `Quartile` -> `Quartile...140`
## • `Quartile` -> `Quartile...142`
## • `Quartile` -> `Quartile...145`
## • `95% CI - Low` -> `95% CI - Low...148`
## • `95% CI - High` -> `95% CI - High...149`
## • `Quartile` -> `Quartile...150`
## • `95% CI - Low` -> `95% CI - Low...154`
## • `95% CI - High` -> `95% CI - High...155`
## • `Quartile` -> `Quartile...156`
## • `95% CI - Low` -> `95% CI - Low...159`
## • `95% CI - High` -> `95% CI - High...160`
## • `Quartile` -> `Quartile...161`
## Warning: Expecting numeric in BX3145 / R3145C76: got '^This data was updated on March 29, 2017. Please see http://www.countyhealthrankings.org/content/data-changes for more information.'
## New names:
## New names:
## • `Quartile` -> `Quartile...8`
## • `95% CI - Low` -> `95% CI - Low...10`
## • `95% CI - High` -> `95% CI - High...11`
## • `Quartile` -> `Quartile...12`
## • `95% CI - Low` -> `95% CI - Low...14`
## • `95% CI - High` -> `95% CI - High...15`
## • `Quartile` -> `Quartile...16`
## • `95% CI - Low` -> `95% CI - Low...18`
## • `95% CI - High` -> `95% CI - High...19`
## • `Quartile` -> `Quartile...20`
## • `95% CI - Low` -> `95% CI - Low...25`
## • `95% CI - High` -> `95% CI - High...26`
## • `Quartile` -> `Quartile...27`
## • `95% CI - Low` -> `95% CI - Low...29`
## • `95% CI - High` -> `95% CI - High...30`
## • `Quartile` -> `Quartile...31`
## • `95% CI - Low` -> `95% CI - Low...33`
## • `95% CI - High` -> `95% CI - High...34`
## • `Quartile` -> `Quartile...35`
## • `Quartile` -> `Quartile...37`
## • `95% CI - Low` -> `95% CI - Low...39`
## • `95% CI - High` -> `95% CI - High...40`
## • `Quartile` -> `Quartile...41`
## • `Quartile` -> `Quartile...43`
## • `95% CI - Low` -> `95% CI - Low...45`
## • `95% CI - High` -> `95% CI - High...46`
## • `Quartile` -> `Quartile...47`
## • `95% CI - Low` -> `95% CI - Low...51`
## • `95% CI - High` -> `95% CI - High...52`
## • `Quartile` -> `Quartile...53`
## • `Quartile` -> `Quartile...56`
## • `95% CI - Low` -> `95% CI - Low...60`
## • `95% CI - High` -> `95% CI - High...61`
## • `Quartile` -> `Quartile...62`
## • `95% CI - Low` -> `95% CI - Low...65`
## • `95% CI - High` -> `95% CI - High...66`
## • `Quartile` -> `Quartile...67`
## • `Quartile` -> `Quartile...71`
## • `Quartile` -> `Quartile...75`
## • `# Mental Health Providers` -> `# Mental Health Providers...76`
## • `MHP Rate` -> `MHP Rate...77`
## • `MHP Ratio` -> `MHP Ratio...78`
## • `# Mental Health Providers` -> `# Mental Health Providers...79`
## • `MHP Rate` -> `MHP Rate...80`
## • `MHP Ratio` -> `MHP Ratio...81`
## • `Quartile` -> `Quartile...82`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...83`
## • `95% CI - Low` -> `95% CI - Low...85`
## • `95% CI - High` -> `95% CI - High...86`
## • `Quartile` -> `Quartile...87`
## • `95% CI - Low` -> `95% CI - Low...90`
## • `95% CI - High` -> `95% CI - High...91`
## • `Quartile` -> `Quartile...92`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...93`
## • `95% CI - Low` -> `95% CI - Low...95`
## • `95% CI - High` -> `95% CI - High...96`
## • `Quartile` -> `Quartile...97`
## • `Quartile` -> `Quartile...100`
## • `95% CI - Low` -> `95% CI - Low...104`
## • `95% CI - High` -> `95% CI - High...105`
## • `Quartile` -> `Quartile...106`
## • `Quartile` -> `Quartile...110`
## • `95% CI - Low` -> `95% CI - Low...113`
## • `95% CI - High` -> `95% CI - High...114`
## • `Quartile` -> `Quartile...115`
## • `Quartile` -> `Quartile...119`
## • `95% CI - Low` -> `95% CI - Low...123`
## • `95% CI - High` -> `95% CI - High...124`
## • `Quartile` -> `Quartile...125`
## • `Quartile` -> `Quartile...128`
## • `Quartile` -> `Quartile...131`
## • `95% CI - Low` -> `95% CI - Low...134`
## • `95% CI - High` -> `95% CI - High...135`
## • `Quartile` -> `Quartile...136`
## • `Quartile` -> `Quartile...138`
## • `Quartile` -> `Quartile...140`
## • `95% CI - Low` -> `95% CI - Low...143`
## • `95% CI - High` -> `95% CI - High...144`
## • `Quartile` -> `Quartile...145`
## • `95% CI - Low` -> `95% CI - Low...149`
## • `95% CI - High` -> `95% CI - High...150`
## • `Quartile` -> `Quartile...151`
## • `95% CI - Low` -> `95% CI - Low...154`
## • `95% CI - High` -> `95% CI - High...155`
## • `Quartile` -> `Quartile...156`
## Warning: Expecting logical in CY2096 / R2096C103: got '*'
## Warning: Expecting logical in CY2125 / R2125C103: got '*'
## Warning: Expecting logical in CY3145 / R3145C103: got '* An error was found in this data, and corrected data is not currently available.'
## New names:
## New names:
## • `Quartile` -> `Quartile...7`
## • `95% CI - Low` -> `95% CI - Low...12`
## • `95% CI - High` -> `95% CI - High...13`
## • `Quartile` -> `Quartile...14`
## • `95% CI - Low` -> `95% CI - Low...16`
## • `95% CI - High` -> `95% CI - High...17`
## • `Quartile` -> `Quartile...18`
## • `95% CI - Low` -> `95% CI - Low...20`
## • `95% CI - High` -> `95% CI - High...21`
## • `Quartile` -> `Quartile...22`
## • `95% CI - Low` -> `95% CI - Low...25`
## • `95% CI - High` -> `95% CI - High...26`
## • `Quartile` -> `Quartile...27`
## • `95% CI - Low` -> `95% CI - Low...32`
## • `95% CI - High` -> `95% CI - High...33`
## • `Quartile` -> `Quartile...34`
## • `95% CI - Low` -> `95% CI - Low...36`
## • `95% CI - High` -> `95% CI - High...37`
## • `Quartile` -> `Quartile...38`
## • `Quartile` -> `Quartile...40`
## • `95% CI - Low` -> `95% CI - Low...42`
## • `95% CI - High` -> `95% CI - High...43`
## • `Quartile` -> `Quartile...44`
## • `Quartile` -> `Quartile...46`
## • `95% CI - Low` -> `95% CI - Low...48`
## • `95% CI - High` -> `95% CI - High...49`
## • `Quartile` -> `Quartile...50`
## • `95% CI - Low` -> `95% CI - Low...54`
## • `95% CI - High` -> `95% CI - High...55`
## • `Quartile` -> `Quartile...56`
## • `Quartile` -> `Quartile...59`
## • `95% CI - Low` -> `95% CI - Low...61`
## • `95% CI - High` -> `95% CI - High...62`
## • `Quartile` -> `Quartile...63`
## • `95% CI - Low` -> `95% CI - Low...69`
## • `95% CI - High` -> `95% CI - High...70`
## • `Quartile` -> `Quartile...71`
## • `Quartile` -> `Quartile...75`
## • `Quartile` -> `Quartile...79`
## • `Quartile` -> `Quartile...83`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...84`
## • `95% CI - Low` -> `95% CI - Low...86`
## • `95% CI - High` -> `95% CI - High...87`
## • `Quartile` -> `Quartile...88`
## • `95% CI - Low` -> `95% CI - Low...91`
## • `95% CI - High` -> `95% CI - High...92`
## • `Quartile` -> `Quartile...93`
## • `# Medicare Enrollees` -> `# Medicare Enrollees...96`
## • `95% CI - Low` -> `95% CI - Low...98`
## • `95% CI - High` -> `95% CI - High...99`
## • `Quartile` -> `Quartile...100`
## • `Quartile` -> `Quartile...106`
## • `95% CI - Low` -> `95% CI - Low...110`
## • `95% CI - High` -> `95% CI - High...111`
## • `Quartile` -> `Quartile...112`
## • `Quartile` -> `Quartile...116`
## • `95% CI - Low` -> `95% CI - Low...118`
## • `95% CI - High` -> `95% CI - High...119`
## • `Quartile` -> `Quartile...120`
## • `Quartile` -> `Quartile...127`
## • `95% CI - Low` -> `95% CI - Low...131`
## • `95% CI - High` -> `95% CI - High...132`
## • `Quartile` -> `Quartile...133`
## • `Quartile` -> `Quartile...136`
## • `Quartile` -> `Quartile...139`
## • `95% CI - Low` -> `95% CI - Low...142`
## • `95% CI - High` -> `95% CI - High...143`
## • `Quartile` -> `Quartile...144`
## • `Quartile` -> `Quartile...146`
## • `Quartile` -> `Quartile...148`
## • `95% CI - Low` -> `95% CI - Low...151`
## • `95% CI - High` -> `95% CI - High...152`
## • `Quartile` -> `Quartile...153`
## • `95% CI - Low` -> `95% CI - Low...155`
## • `95% CI - High` -> `95% CI - High...156`
## • `Quartile` -> `Quartile...157`
## • `95% CI - Low` -> `95% CI - Low...163`
## • `95% CI - High` -> `95% CI - High...164`
## • `Quartile` -> `Quartile...165`
RMD$year=as.integer(RMD$year)
RMD=RMD %>% filter(substring(FIPS, 3,5)!="000")
RMD=RMD %>% drop_na(FIPS)
RMDwide= pivot_wider(RMD, id_cols = FIPS,  names_from = year, values_from = c(YPLLRateLow, YPLLRateHigh))

RMDwide$range15=(RMDwide$YPLLRateHigh_2015 - RMDwide$YPLLRateLow_2015)/(4*1.96)
RMDwide$irange15=1/RMDwide$range15
RMDwide$weight15=RMDwide$irange15 / sum(RMDwide$irange15, na.rm = TRUE)
sum(RMDwide$weight15 , na.rm = TRUE)
## [1] 1
RMDwide$range16=(RMDwide$YPLLRateHigh_2016 - RMDwide$YPLLRateLow_2016)/(4*1.96)
RMDwide$irange16=1/RMDwide$range16
RMDwide$weight16=RMDwide$irange16 / sum(RMDwide$irange16, na.rm = TRUE)
sum(RMDwide$weight16, na.rm = TRUE)
## [1] 1
RMDwide$range17=(RMDwide$YPLLRateHigh_2017 - RMDwide$YPLLRateLow_2017)/(4*1.96)
RMDwide$irange17=1/RMDwide$range17
RMDwide$weight17=RMDwide$irange17 / sum(RMDwide$irange17, na.rm = TRUE)
sum(RMDwide$weight17, na.rm = TRUE)
## [1] 1
RMDwide$range18=(RMDwide$YPLLRateHigh_2018 - RMDwide$YPLLRateLow_2018)/(4*1.96)
RMDwide$irange18=1/RMDwide$range18
RMDwide$weight18=RMDwide$irange18 / sum(RMDwide$irange18, na.rm = TRUE)
sum(RMDwide$weight18, na.rm = TRUE)
## [1] 1
RMDwide$range19=(RMDwide$YPLLRateHigh_2019 - RMDwide$YPLLRateLow_2019)/(4*1.96)
RMDwide$irange19=1/RMDwide$range19
RMDwide$weight19=RMDwide$irange19 / sum(RMDwide$irange19, na.rm = TRUE)
sum(RMDwide$weight19, na.rm = TRUE)
## [1] 1
weight=RMDwide[c(1, 14, 17, 20, 23, 26)]
weight[is.na(weight)] = 0
weight$averweight=dim(weight)[1]*(weight$weight15 + weight$weight16 + weight$weight17 + weight$weight18 + weight$weight19)/5
weight=weight[c(1, 7)]
save(weight, file = "weight.RData")

Perform merges:

## Warning: One or more parsing issues, see `problems()` for details

Tune xgbTree:

load(file = "YPLLanalytic.Rdata")

nc = parallel::detectCores()  
cl = makePSOCKcluster(nc-1)   # Set number of cores equal to machine number minus one
registerDoParallel(cl)        #Set up parallel

fitControl = trainControl(method = "repeatedcv",
                          number = 10,
                          repeats = 10,
                          allowParallel=TRUE
)

#Step 1: find range of nrounds associated with different learning rates with non-stochastic trees, default max_depth 6, min_child_weight 20

# xgbGrid1 = expand.grid(nrounds = c(1,5,10,20,50,100,150,200,250,300,350,400,450,500,550,600,650,700,750,800,850,900,950,1000,1100,1200,1300,1400,1500), 
#                        max_depth = 6, 
#                        eta = c(.0025,.005,.01,.02,.04,.08,.16,.32), 
#                        gamma = 0,
#                        colsample_bytree = 1, 
#                        min_child_weight = 20,
#                        subsample = 1) 
# 
# set.seed (112358)
# xgbFit1 = train(YPLLdif ~ ., data = YPLLanalytic,
#                 method = "xgbTree", 
#                 trControl = fitControl,
#                 tuneGrid = xgbGrid1,
#                 weights = analyticwrace$averweight,
#                 nthread=1,
#                 verbosity = 0
# )
# 
# plot(xgbFit1)
# xgbFit1$bestTune
# 
# #Step 2: We've ruled out fast learning rates. .005 and .01 are approximately the same. Next, allow for stochastic variation and see if range still works
# 
# xgbGrid2 = expand.grid(nrounds = seq(100,1000,100), 
#                        max_depth = 6, 
#                        eta = c(.005,.01), 
#                        gamma = 0,
#                        colsample_bytree = c(.2,.5,.8,1), 
#                        min_child_weight = 20,
#                        subsample = c(.2,.5,.8,1)) 
# 
# set.seed (112358)
# xgbFit2 = train(YPLLdif ~ ., data = YPLLanalytic,
#                 method = "xgbTree", 
#                 trControl = fitControl,
#                 tuneGrid = xgbGrid2,
#                 weights = analyticwrace$averweight,
#                 nthread=1,
#                 verbosity = 0
# )
# 
# plot(xgbFit2)
# xgbFit2$bestTune
# 
# #Step 3: eta = .005 confirmed around 600 trees; approx subsample .2, colsample .5; now vary depth
# 
# xgbGrid3 = expand.grid(nrounds = seq(400,1200,50), 
#                        max_depth = c(3,4,5,6,7,8,9), 
#                        eta = .005, 
#                        gamma = 0,
#                        colsample_bytree = c(.4,.5,.6,.7), 
#                        min_child_weight = 20,
#                        subsample = c(.1,.2,.3)) 
# 
# set.seed (112358)
# xgbFit3 = train(YPLLdif ~ ., data = YPLLanalytic,
#                 method = "xgbTree", 
#                 trControl = fitControl,
#                 tuneGrid = xgbGrid3,
#                 weights = analyticwrace$averweight,
#                 nthread=1,
#                 verbosity = 0
# )
# 
# plot(xgbFit3)
# xgbFit3$bestTune

#Step 4: final fine-tuning / subsample .2, colsample .5, depth 7, around 600 trees

xgbGridFinal = expand.grid(nrounds = seq(500,700,2), 
                       max_depth = 7, 
                       eta = .005, 
                       gamma = 0,
                       colsample_bytree = .5, 
                       min_child_weight = 20,
                       subsample = .2) 

set.seed (112358)
xgbFitFinal = train(YPLLdif ~ ., data = YPLLanalytic,
                method = "xgbTree", 
                trControl = fitControl,
                tuneGrid = xgbGridFinal,
                weights = analyticwrace$averweight,
                nthread=1,
                verbosity = 0
)

plot(xgbFitFinal)

xgbFitFinal$bestTune
##    nrounds max_depth   eta gamma colsample_bytree min_child_weight subsample
## 39     576         7 0.005     0              0.5               20       0.2
stopCluster(cl)

save(xgbFitFinal,file="xgbFitFinal.Rdata")

Results:

load(file="YPLLanalytic.Rdata")
load(file = "xgbFitFinal.Rdata")

plot(varImp(xgbFitFinal))

vi=varImp(xgbFitFinal)
vi$importance
##                  Overall
## obesitya      100.000000
## inactivitya    88.772228
## somecollegea   88.168302
## PCPRateb       60.578163
## inactivityb    58.391528
## childpovertya  57.927225
## hpnon          57.030086
## highschoola    55.004340
## uninsureda     53.720164
## highschoolb    50.112562
## PCPRatea       49.386681
## unemploymenta  46.582051
## somecollegeb   43.606689
## blwt           39.858873
## obesityb       39.850189
## childpovertyb  19.422004
## uninsuredb      8.052255
## unemploymentb   0.000000
xgbFitFinal
## eXtreme Gradient Boosting 
## 
## 2509 samples
##   18 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 2259, 2258, 2258, 2258, 2259, 2258, ... 
## Resampling results across tuning parameters:
## 
##   nrounds  RMSE      Rsquared    MAE     
##   500      1089.851  0.02950927  779.6507
##   502      1089.824  0.02953262  779.6333
##   504      1089.795  0.02954632  779.6084
##   506      1089.777  0.02955068  779.5970
##   508      1089.726  0.02960608  779.5757
##   510      1089.728  0.02955611  779.5796
##   512      1089.709  0.02956211  779.5730
##   514      1089.687  0.02957507  779.5708
##   516      1089.670  0.02957168  779.5568
##   518      1089.671  0.02955039  779.5548
##   520      1089.661  0.02953098  779.5451
##   522      1089.635  0.02955591  779.5312
##   524      1089.611  0.02957802  779.5247
##   526      1089.563  0.02963837  779.4881
##   528      1089.562  0.02961942  779.4907
##   530      1089.548  0.02963361  779.4794
##   532      1089.526  0.02964628  779.4655
##   534      1089.499  0.02965325  779.4498
##   536      1089.471  0.02968219  779.4234
##   538      1089.476  0.02965969  779.4225
##   540      1089.459  0.02968307  779.4011
##   542      1089.448  0.02970502  779.4051
##   544      1089.433  0.02971258  779.3936
##   546      1089.453  0.02965811  779.4043
##   548      1089.409  0.02971190  779.3666
##   550      1089.399  0.02970951  779.3552
##   552      1089.363  0.02975816  779.3274
##   554      1089.370  0.02973438  779.3402
##   556      1089.356  0.02974686  779.3198
##   558      1089.325  0.02979144  779.2891
##   560      1089.318  0.02978840  779.2858
##   562      1089.313  0.02978528  779.2697
##   564      1089.297  0.02979120  779.2574
##   566      1089.269  0.02983674  779.2354
##   568      1089.269  0.02983018  779.2329
##   570      1089.248  0.02984857  779.2270
##   572      1089.228  0.02986549  779.2204
##   574      1089.206  0.02989790  779.1958
##   576      1089.177  0.02993063  779.1761
##   578      1089.191  0.02990882  779.1823
##   580      1089.193  0.02990012  779.1823
##   582      1089.207  0.02985788  779.2069
##   584      1089.210  0.02983693  779.2141
##   586      1089.198  0.02987002  779.2167
##   588      1089.205  0.02985623  779.2309
##   590      1089.216  0.02983025  779.2398
##   592      1089.216  0.02982504  779.2428
##   594      1089.203  0.02983931  779.2392
##   596      1089.207  0.02982265  779.2540
##   598      1089.201  0.02982628  779.2407
##   600      1089.214  0.02980098  779.2501
##   602      1089.235  0.02976431  779.2631
##   604      1089.264  0.02970339  779.2806
##   606      1089.252  0.02972357  779.2840
##   608      1089.253  0.02971334  779.2787
##   610      1089.263  0.02969112  779.2887
##   612      1089.240  0.02971430  779.2803
##   614      1089.248  0.02969926  779.2713
##   616      1089.237  0.02971580  779.2594
##   618      1089.236  0.02971987  779.2547
##   620      1089.252  0.02968986  779.2692
##   622      1089.266  0.02966190  779.2846
##   624      1089.268  0.02966561  779.2881
##   626      1089.277  0.02965476  779.2996
##   628      1089.257  0.02969646  779.3043
##   630      1089.246  0.02970701  779.2985
##   632      1089.260  0.02967930  779.3208
##   634      1089.298  0.02962410  779.3465
##   636      1089.303  0.02960992  779.3552
##   638      1089.296  0.02962982  779.3417
##   640      1089.265  0.02967679  779.3261
##   642      1089.271  0.02966479  779.3428
##   644      1089.289  0.02966576  779.3714
##   646      1089.293  0.02966442  779.3724
##   648      1089.306  0.02966532  779.3708
##   650      1089.316  0.02965095  779.3774
##   652      1089.299  0.02969356  779.3580
##   654      1089.306  0.02969807  779.3554
##   656      1089.323  0.02966414  779.3681
##   658      1089.330  0.02966562  779.3846
##   660      1089.361  0.02961481  779.4151
##   662      1089.361  0.02961098  779.4247
##   664      1089.366  0.02960398  779.4407
##   666      1089.353  0.02962163  779.4277
##   668      1089.346  0.02963284  779.4290
##   670      1089.333  0.02964888  779.4244
##   672      1089.342  0.02963171  779.4353
##   674      1089.319  0.02967121  779.4352
##   676      1089.338  0.02963037  779.4488
##   678      1089.320  0.02964777  779.4368
##   680      1089.316  0.02965525  779.4337
##   682      1089.338  0.02962640  779.4484
##   684      1089.357  0.02959830  779.4575
##   686      1089.359  0.02960974  779.4652
##   688      1089.365  0.02960742  779.4639
##   690      1089.365  0.02961497  779.4588
##   692      1089.380  0.02959577  779.4729
##   694      1089.391  0.02959433  779.4730
##   696      1089.397  0.02959123  779.4883
##   698      1089.396  0.02960269  779.4876
##   700      1089.404  0.02958937  779.4925
## 
## Tuning parameter 'max_depth' was held constant at a value of 7
## Tuning parameter 'eta' was held constant at a value of 0.005
## Tuning parameter 'gamma' was held constant at a value of 0
## Tuning parameter 'colsample_bytree' was held constant at a value of 0.5
## Tuning parameter 'min_child_weight' was
##  held constant at a value of 20
## Tuning parameter 'subsample' was held constant at a value of 0.2
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nrounds = 576, max_depth = 7, eta = 0.005, gamma = 0, colsample_bytree = 0.5, min_child_weight = 20 and subsample = 0.2.
xgb.pdp = list()
res.partialplot = list()
predvarls = rownames(varImp(xgbFitFinal)$importance)

for (m in 1:length(predvarls)){
  xgb.pdp[[m]] = 
    partial(
      object = xgbFitFinal,
      pred.var = predvarls[[m]],
      plot = FALSE,
      chull = TRUE,
      plot.engine = "ggplot2"
    )
  res.partialplot[[m]] = plotPartial(xgb.pdp[[m]], rug =TRUE, train = YPLLanalytic, ylim=c(99, 601))
}

for(j in 1:length(predvarls)){
  print(res.partialplot[[j]])
}

library(sfsmisc)
## 
## Attaching package: 'sfsmisc'
## The following object is masked from 'package:Hmisc':
## 
##     errbar
## The following object is masked from 'package:dplyr':
## 
##     last
library(usmap)

res_e = numeric(length = length(predvarls))
res_semi_e = numeric(length = length(predvarls))
county_e = matrix(data = NA,nrow = dim(YPLLanalytic)[1],ncol = length(predvarls))
res.elastplot = list()
res.semi_elastplot = list()
goodlist = c("somecollegea","somecollegeb","PCPratea","PCPrateb","highschoola","highschoolb") #Variables where increase is hypothesized to reduce YPLLdif

for (j in 1:length(predvarls)) {
  sm_pdp_j = smooth.spline(x = xgb.pdp[[j]][, 1], y = xgb.pdp[[j]][, 2], df = 5) #Smooth partial dependency of YPLL on predictor j
  d_pdp_d_j = D1tr(y = sm_pdp_j$y, x = sm_pdp_j$x) #Derivative of smoothed partial dependency of YPLL on predictor j
  e_pdp_j = (sm_pdp_j$x / sm_pdp_j$y) * d_pdp_d_j #Elasticity of YPLL with respect to predictor j
  plot(
    sm_pdp_j$x,
    e_pdp_j,
    xlab = predvarls[j],
    ylab = "Elasticity of YPLLdif",
    ylim = c(-1, 1)
  )
  interp.elast = approxfun(x = sm_pdp_j$x, y = e_pdp_j)
  res_e[j] = weighted.mean(x = interp.elast(YPLLanalytic[, predvarls[j]]), w =
                             analyticwrace$averweight)
  county_e[, j] = interp.elast(YPLLanalytic[, predvarls[j]])
  etest = data.frame(fips = analyticwrace$FIPS, elast = county_e[, j])
  # if (predvarls[j] %in% goodlist) {
  #   semi_e_j = -county_e[, j] * abs(YPLLanalytic$YPLLdif)
  # } else semi_e_j = county_e[, j] * abs(YPLLanalytic$YPLLdif)
  
  semi_e_j = county_e[, j] * abs(YPLLanalytic$YPLLdif)
  res_semi_e[j]=weighted.mean(x = county_e[, j] * abs(YPLLanalytic$YPLLdif), w =
                             analyticwrace$averweight)
  
  semi_etest = data.frame(fips = analyticwrace$FIPS, semi_elast = semi_e_j)
  res.elastplot[[j]] = plot_usmap(regions = "counties",
                                  data = etest,
                                  values = "elast") + scale_fill_gradientn(
                                    colours = c("darkgreen", "green", "white", "red", "darkred"),
                                    na.value = "gray",
                                    breaks = c(-1,-.5, 0, .5, 1),
                                    labels = c(-1,-.5, 0, .5, 1),
                                    limits = c(-1.2, 1.2)
                                  ) + ggtitle(predvarls[j])
  print(res.elastplot[[j]])
  res.semi_elastplot[[j]] = plot_usmap(regions = "counties",
                                       data = semi_etest,
                                       values = "semi_elast") + scale_fill_gradientn(
                                         colours = c("darkgreen", "green", "white", "red", "darkred"),
                                         na.value = "gray",
                                         breaks = c(-1500,-750,-375, 0, 375, 750, 1500),
                                         labels = c(-1500,-750,-375, 0, 375, 750, 1500),
                                         limits = c(-3000, 3000)
                                       )+ ggtitle(predvarls[j])
  print(res.semi_elastplot[[j]])
}
## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

## Warning: Ignoring unknown parameters: linewidth

print(cbind(predvarls,res_e)) #Overall elasticity results
##       predvarls       res_e                 
##  [1,] "obesitya"      "0.722628703573935"   
##  [2,] "inactivitya"   "0.370187562607025"   
##  [3,] "somecollegea"  "-0.259859798699842"  
##  [4,] "PCPRateb"      "-0.00423301209769754"
##  [5,] "inactivityb"   "-0.00427052706075375"
##  [6,] "childpovertya" "0.187478719322619"   
##  [7,] "hpnon"         "0.0201177261337519"  
##  [8,] "highschoola"   "-0.0400321326226932" 
##  [9,] "uninsureda"    "-0.239355491732303"  
## [10,] "highschoolb"   "-0.0112087380758103" 
## [11,] "PCPRatea"      "0.0399755131094835"  
## [12,] "unemploymenta" "0.277642883561138"   
## [13,] "somecollegeb"  "-0.0526149802345273" 
## [14,] "blwt"          "-0.00704662253025246"
## [15,] "obesityb"      "0.037059867239215"   
## [16,] "childpovertyb" "-0.0104389801824501" 
## [17,] "uninsuredb"    "0.00380738793305452" 
## [18,] "unemploymentb" "0.0150438495678506"
print(cbind(predvarls,res_semi_e)) #semi elasticity results
##       predvarls       res_semi_e         
##  [1,] "obesitya"      "522.65128400551"  
##  [2,] "inactivitya"   "241.663163605211" 
##  [3,] "somecollegea"  "-138.658803621263"
##  [4,] "PCPRateb"      "-2.43694527315993"
##  [5,] "inactivityb"   "-2.06347207566088"
##  [6,] "childpovertya" "132.046718025043" 
##  [7,] "hpnon"         "15.6972640274671" 
##  [8,] "highschoola"   "-27.1175971504716"
##  [9,] "uninsureda"    "-166.978743556677"
## [10,] "highschoolb"   "-7.51616137709872"
## [11,] "PCPRatea"      "24.2266195080033" 
## [12,] "unemploymenta" "183.322017382117" 
## [13,] "somecollegeb"  "-42.7653873332059"
## [14,] "blwt"          "-7.71637768114674"
## [15,] "obesityb"      "28.4553443468538" 
## [16,] "childpovertyb" "-7.34847169099488"
## [17,] "uninsuredb"    "2.25357146884707" 
## [18,] "unemploymentb" "9.94443943601385"